shhh <- suppressPackageStartupMessages # It's a library, so shhh!
shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))
shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)
shhh(library(car))
shhh(library(coda))
shhh(library(gridExtra))
theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}
Read in MoTR Data
rate = 160
file_prefix = "../data/provo_f160/"
fnames = list.files(path=file_prefix)
df = data.frame()
for (f in fnames) {
temp = read.csv(paste0(file_prefix, "/", f)) %>%
mutate(subj = str_remove(f, "_reading_measures.csv"))
df = rbind(df, temp)
}
# Filter out readers whose accuracy to the comprehension questions were less than 80%.
filter_df = df %>%
group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
drop_na() %>%
group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
mutate(p_correct = round(p_correct, digits = 2))
`summarise()` has grouped output by 'para_nr'. You can override using the `.groups` argument.
filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
pilot_exceptions <- c("reader_255", "reader_256", "reader_259", "reader_261", "reader_262", "reader_263")
raw_df = df %>%
filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
mutate(word = str_trim(word)) %>%
mutate(subj = str_remove(subj, "reader_")) %>%
mutate(subj = as.character(subj)) %>%
mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
mutate(skip = if_else(FPFix == 1, 0, 1)) %>% # use the same defination as in provo paper
dplyr::select(subj, expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, skip) %>%
gather(metric, value, 7:10) %>%
group_by(para_nr, subj, metric, cond_id, expr_id) %>%
mutate(fixed = if_else(value > 0, 1, 0),
n_fixed = sum(fixed),
n_words = n()) %>%
ungroup() %>%
mutate(fix_threshold = n_fixed > (n_words / 5)) %>%
mutate(skimming = if_else(fix_threshold == F,T, F)) %>%
filter(skimming == F) %>%
spread(metric, value) %>%
dplyr::select(-fixed, -n_fixed, -n_words, -fix_threshold, -skimming)
length(unique(raw_df$subj))
[1] 98
# View(raw_df)
df %>%
filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
filter(FPReg >= 0) %>%
dplyr::select(FPReg) %>%
drop_na() %>%
summarise( m = mean(FPReg))
df %>%
filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
dplyr::select(FPFix) %>%
drop_na() %>%
summarise( m = mean(FPFix))
NA
NA
View(raw_df)
# Average across subjects
motr_agg_df = raw_df %>%
gather(metric, value, 7:12) %>%
filter(value >= 0) %>% #Removes the "NA" values for FPReg
# ==== Remove skipped words
mutate(zero = if_else(!metric %in% c("FPReg", "skip") & value == 0,T, F)) %>%
filter(zero == F) %>%
drop_na() %>%
group_by(para_nr, word_nr, word, metric) %>%
# === Remove outliers > 3SD
# mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
summarise(value = mean(value), nsubj = length(unique(subj))) %>%
ungroup() %>%
arrange(para_nr, word_nr) %>%
rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
`summarise()` has grouped output by 'para_nr', 'word_nr', 'word'. You can override using the `.groups` argument.
# View(motr_agg_df)
Comparison to Provo
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../data/provo_stats.csv") %>%
dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
rename(word_idx = trigger_idx)
provo_modeling_df
NA
# Read in Provo eyetracking data
provo_raw_df = read.csv("../data/provo_eyetracking.csv")
# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))
provo_eyetracking_df = provo_raw_df %>%
dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
rename( #first_duration = IA_FIRST_FIXATION_DURATION,
gaze_duration = IA_FIRST_RUN_DWELL_TIME,
total_duration = IA_DWELL_TIME,
go_past_time = IA_REGRESSION_PATH_DURATION,
subj = Participant_ID,
text_id = Text_ID,
sent_id = Sentence_Number,
word_idx = Word_In_Sentence_Number,
word_text_idx = Word_Number, # IA_ID?
word = Word, # Word?
FPReg = IA_REGRESSION_OUT,
skip = IA_SKIP,
ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
mutate(first_duration = gaze_duration) %>%
mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
dplyr::select(-ff_progressive) %>%
mutate(
gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
first_duration = if_else(total_duration == 0, 0, as.double(first_duration)),
) %>%
gather(metric, value, 7:12) %>%
filter(value >= 0) %>% # filter skipped word in eye tracking data for FPReg
# ==== Remove skipped words
mutate(zero = if_else(!metric %in% c("FPReg", "skip") & value == 0,T, F)) %>%
filter(zero == F) %>%
# mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
# mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
drop_na() %>%
mutate(word = str_trim(word)) %>%
mutate(subj = str_remove(subj, "Sub")) %>%
mutate(subj = as.integer(subj)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
# === Remove outliers > 3SD
# mutate(outlier = if_else(! metric %in% c("FPReg", "skip") & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
# filter(outlier == F) %>%
ungroup() #%>%
# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value),
nsubj = length(unique(subj))) %>%
ungroup()
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
provo_raw_df %>%
dplyr::select(IA_REGRESSION_OUT) %>%
drop_na() %>%
summarise( m = mean(IA_REGRESSION_OUT))
provo_raw_df %>%
dplyr::select(IA_SKIP) %>%
drop_na() %>%
summarise( m = mean(IA_SKIP))
NA
# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
filter(subj <= 42) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
rename(value_1 = value) #%>%
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# dplyr::select(-sent_id, -word_idx)
provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
rename(word = word.y) %>%
dplyr::select(text_id, word_text_idx, metric, word, value_1)
# View(provo_eyetracking_subj1_df)
provo_eyetracking_subj2_df = provo_eyetracking_df %>%
filter(subj > 42) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
rename(value_2 = value)%>%
dplyr::select(-sent_id, -word_idx)
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_subj2_df)
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
# filter(word.x == word.y) %>%
dplyr::select(-word.y) %>%
# === Remove outliers > 3SD
# group_by(metric) %>%
# mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
# filter(motr_outlier == F) %>%
# mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
# filter(eyetr_outlier == F) %>%
# ungroup() %>%
gather(measure, value, c("value_1", "value_2")) #%>%
# dplyr::select(-motr_outlier, -eyetr_outlier)
# View(provo_eyetr_grouped_df)
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
arrange(text_id, sent_id, word_idx) %>%
rename(eyetr_value = value)
provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
# almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
# === Remove outliers > 3SD
# group_by(metric) %>%
# mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
# filter(motr_outlier == F) %>%
# mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
# filter(eyetr_outlier == F) %>%
# ungroup() %>%
gather(measure, value, c("eyetr_value", "motr_value")) #%>%
# dplyr::select(-motr_outlier, -eyetr_outlier)
# provo_df
Bayesian – use Stan – motr & eyetr correlation
print("Gaze Duration")
[1] "Gaze Duration"
gd_df = provo_df %>% filter(metric == "gaze_duration") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
cor
0.4944
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
cor
0.5015
# View(gd_df)
gd_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")

NA
# center data around 0.
gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
# mutate(eyetr_value = eyetr_value - mean(eyetr_value),
# motr_value = motr_value - mean(motr_value)) %>%
data.matrix()
gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")

gd_data = list(x=gd_temp, N=nrow(gd_temp))
fit_gd = stan(
file="stan_models/bivariate_correlation.stan",
data=gd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds"))
print("Go Past Time")
[1] "Go Past Time"
gpt_df = provo_df %>% filter(metric == "go_past_time") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
cor
0.5122
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)
cor
0.4607
gpt_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()
gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")

# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
file="stan_models/bivariate_correlation.stan",
data=gpt_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds"))
print("Total Duration")
[1] "Total Duration"
td_df = provo_df %>% filter(metric == "total_duration") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
cor
0.5952
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)
cor
0.5957
td_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()
td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")

# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
file="stan_models/bivariate_correlation.stan",
data=td_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds"))
print("First Pass Regression Prob.")
[1] "First Pass Regression Prob."
reg_df = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)
cor
0.386
# View(reg_df)
reg_df %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
file="stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds"))
rank transformation for FPReg
reg_df = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
mutate(
eyetr_value_rank = ifelse(eyetr_value > 0, rank(eyetr_value), NA),
motr_value_rank = ifelse(motr_value > 0, rank(motr_value), NA)
) %>%
drop_na()
# mutate(
# eyetr_value_rank = rank(eyetr_value, ties.method = "average"),
# motr_value_rank = rank(motr_value, ties.method = "average")
# )
# View(reg_df)
print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank)$estimate)
print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank, method = "kendall"))
reg_df %>%
gather(measure, value, 14:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp <- reg_df[c("eyetr_value_rank", "motr_value_rank")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
main = "Rank Transformed")
# -------fit model FPReg RANK----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
file="stan_models/bivariate_correlation_rank.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds"))
print("skip Prob.")
[1] "skip Prob."
skip_df = provo_df %>% filter(metric == "skip") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5))
print(cor.test(skip_df$eyetr_value, skip_df$motr_value)$estimate)
cor
0.7945
# View(reg_df)
skip_df %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
skip_temp <- skip_df[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(skip_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

# -------fit model skip ----------
skip_data = list(x=skip_temp, N=nrow(skip_temp))
fit_skip = stan(
file="stan_models/bivariate_normal_reg.stan",
data=skip_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_skip@stanmodel@dso <- new("cxxdso")
saveRDS(fit_skip, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_skip_cor_drop0s.rds"))
fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds")
fit_reg_rank = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds")
fit_skip = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_skip_cor_drop0s.rds")
# models for drop 0s
# fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_motr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_gd)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 234.30 0.01 0.90 232.53 233.69 234.29 234.91 236.06 6945 1
mu[2] 371.77 0.02 1.74 368.30 370.63 371.77 372.91 375.24 6640 1
sigma[1] 37.80 0.01 0.81 36.24 37.26 37.79 38.34 39.42 5283 1
sigma[2] 71.83 0.02 1.46 69.03 70.82 71.83 72.82 74.71 5227 1
nu 4.46 0.00 0.30 3.92 4.25 4.45 4.65 5.08 5469 1
rho 0.51 0.00 0.02 0.48 0.50 0.51 0.52 0.54 6947 1
cov[1,1] 1429.34 0.84 61.16 1313.63 1388.16 1427.81 1469.75 1553.95 5276 1
cov[1,2] 1391.14 1.15 78.47 1239.24 1339.15 1390.35 1441.79 1551.58 4667 1
cov[2,1] 1391.14 1.15 78.47 1239.24 1339.15 1390.35 1441.79 1551.58 4667 1
cov[2,2] 5161.37 2.91 210.40 4765.32 5015.23 5159.80 5302.57 5580.95 5231 1
x_rand[1] 235.01 0.55 49.34 138.86 206.52 234.61 262.42 336.83 8089 1
x_rand[2] 373.62 1.07 95.15 182.83 319.23 372.21 425.30 567.12 7980 1
attempt 0.00 0.00 0.05 0.00 0.00 0.00 0.00 0.00 8050 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -25082.82 0.03 1.80 -25087.41 -25083.73 -25082.46 -25081.52 -25080.40 3875 1
Samples were drawn using NUTS(diag_e) at Tue Feb 6 00:04:08 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_gpt)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 306.99 0.02 1.71 303.61 305.9 306.97 308.14 310.35 8255 1
mu[2] 425.10 0.03 2.96 419.39 423.0 425.10 427.11 430.79 8294 1
sigma[1] 67.11 0.02 1.47 64.27 66.1 67.10 68.10 70.03 7296 1
sigma[2] 116.24 0.03 2.64 111.16 114.5 116.21 118.01 121.46 7232 1
nu 2.26 0.00 0.09 2.08 2.2 2.26 2.32 2.45 7418 1
rho 0.42 0.00 0.02 0.38 0.4 0.42 0.43 0.45 8786 1
cov[1,1] 4505.28 2.31 197.26 4130.64 4369.6 4501.91 4637.37 4904.27 7307 1
cov[1,2] 3248.81 2.51 213.71 2842.87 3101.4 3244.96 3391.94 3668.78 7277 1
cov[2,1] 3248.81 2.51 213.71 2842.87 3101.4 3244.96 3391.94 3668.78 7277 1
cov[2,2] 13517.55 7.22 613.53 12357.23 13097.8 13505.59 13925.78 14752.13 7226 1
x_rand[1] 318.77 1.49 134.42 121.89 258.4 309.38 363.21 564.06 8092 1
x_rand[2] 451.19 3.26 294.27 111.69 344.8 430.62 522.20 876.86 8131 1
attempt 0.04 0.00 0.21 0.00 0.0 0.00 0.00 1.00 8093 1
max_attempts 10.00 NaN 0.00 10.00 10.0 10.00 10.00 10.00 NaN NaN
lp__ -29010.67 0.03 1.76 -29015.10 -29011.6 -29010.35 -29009.40 -29008.25 3994 1
Samples were drawn using NUTS(diag_e) at Tue Feb 6 00:05:58 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_td)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 266.87 0.02 1.28 264.39 266.01 266.86 267.75 269.33 5448 1
mu[2] 420.80 0.03 2.46 416.02 419.15 420.76 422.47 425.53 5573 1
sigma[1] 52.62 0.02 1.10 50.47 51.88 52.61 53.36 54.78 4500 1
sigma[2] 100.49 0.03 2.03 96.58 99.11 100.48 101.84 104.53 4579 1
nu 4.28 0.00 0.27 3.79 4.09 4.27 4.46 4.85 5361 1
rho 0.62 0.00 0.01 0.59 0.61 0.62 0.63 0.65 6752 1
cov[1,1] 2769.92 1.73 115.77 2547.56 2691.20 2768.16 2846.87 3000.64 4498 1
cov[1,2] 3278.82 2.57 164.70 2966.61 3164.38 3277.56 3386.73 3608.01 4100 1
cov[2,1] 3278.82 2.57 164.70 2966.61 3164.38 3277.56 3386.73 3608.01 4100 1
cov[2,2] 10102.99 6.04 408.74 9326.88 9822.79 10095.99 10371.05 10925.76 4584 1
x_rand[1] 268.11 0.79 69.98 127.37 228.67 268.19 306.65 407.71 7859 1
x_rand[2] 427.30 1.52 133.15 166.09 351.69 423.23 498.89 698.02 7666 1
attempt 0.01 0.00 0.09 0.00 0.00 0.00 0.00 0.00 8123 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -26591.96 0.03 1.74 -26596.19 -26592.83 -26591.63 -26590.69 -26589.56 3343 1
Samples were drawn using NUTS(diag_e) at Tue Feb 6 00:07:59 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_reg)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.14 0.00 0.00 0.13 0.14 0.14 0.14 0.14 8022 1
mu[2] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 8135 1
sigma[1] 0.08 0.00 0.00 0.08 0.08 0.08 0.08 0.09 7142 1
sigma[2] 0.05 0.00 0.00 0.04 0.05 0.05 0.05 0.05 7221 1
nu 2.78 0.00 0.18 2.46 2.65 2.77 2.89 3.14 7451 1
rho 0.21 0.00 0.03 0.16 0.19 0.21 0.23 0.27 9921 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 7143 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8463 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8463 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7208 1
x_rand[1] 0.17 0.00 0.13 0.02 0.10 0.15 0.21 0.42 7885 1
x_rand[2] 0.10 0.00 0.07 0.01 0.06 0.09 0.12 0.25 7935 1
attempt 0.17 0.01 0.46 0.00 0.00 0.00 0.00 1.00 7711 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 4545.35 0.03 1.75 4541.07 4544.43 4545.68 4546.62 4547.74 3653 1
Samples were drawn using NUTS(diag_e) at Tue Feb 6 00:09:17 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. RANK--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. RANK--------------------------------------------"
print(fit_reg_rank)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 1374.90 0.18 18.98 1337.48 1362.16 1374.73 1387.9 1411.91 11231 1
mu[2] 1804.27 0.11 11.31 1782.23 1796.66 1804.43 1811.9 1826.26 10240 1
sigma[1] 696.99 0.12 12.87 672.39 688.36 696.94 705.4 723.03 10941 1
sigma[2] 418.96 0.08 7.84 403.87 413.54 418.83 424.2 434.92 10853 1
nu 102.55 0.23 23.98 63.02 85.64 100.14 116.5 157.44 10504 1
rho 0.19 0.00 0.03 0.14 0.17 0.19 0.2 0.24 11138 1
cov[1,1] 485954.71 172.11 17956.82 452113.94 473835.62 485723.58 497595.5 522770.76 10885 1
cov[1,2] 54509.75 75.79 7861.98 39206.85 49139.03 54379.02 59723.2 70306.93 10762 1
cov[2,1] 54509.75 75.79 7861.98 39206.85 49139.03 54379.02 59723.2 70306.93 10762 1
cov[2,2] 175589.65 63.18 6576.69 163113.47 171011.52 175420.06 179976.5 189159.61 10834 1
x_rand[1] 1420.17 7.48 659.95 217.73 942.20 1404.35 1864.0 2767.59 7778 1
x_rand[2] 1804.04 4.94 427.24 964.46 1521.76 1803.44 2090.0 2641.59 7493 1
attempt 0.03 0.00 0.18 0.00 0.00 0.00 0.0 1.00 8072 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.0 10.00 NaN NaN
lp__ -20329.53 0.03 1.75 -20333.76 -20330.49 -20329.20 -20328.2 -20327.15 4066 1
Samples were drawn using NUTS(diag_e) at Tue Feb 6 00:12:53 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Skip Prob.--------------------------------------------')
[1] "---------------------------- Skip Prob.--------------------------------------------"
print(fit_skip)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.44 0.00 0.00 0.43 0.44 0.44 0.44 0.45 5294 1
mu[2] 0.46 0.00 0.01 0.45 0.45 0.46 0.46 0.47 5320 1
sigma[1] 0.22 0.00 0.00 0.22 0.22 0.22 0.23 0.23 4893 1
sigma[2] 0.25 0.00 0.00 0.24 0.25 0.25 0.25 0.25 5030 1
nu 100.97 0.26 23.54 62.68 84.28 98.17 115.01 154.51 7911 1
rho 0.80 0.00 0.01 0.78 0.79 0.80 0.80 0.81 5412 1
cov[1,1] 0.05 0.00 0.00 0.05 0.05 0.05 0.05 0.05 4891 1
cov[1,2] 0.04 0.00 0.00 0.04 0.04 0.04 0.05 0.05 4075 1
cov[2,1] 0.04 0.00 0.00 0.04 0.04 0.04 0.05 0.05 4075 1
cov[2,2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.06 5025 1
x_rand[1] 0.46 0.00 0.20 0.09 0.32 0.46 0.60 0.89 7796 1
x_rand[2] 0.48 0.00 0.23 0.08 0.32 0.47 0.63 0.96 7592 1
attempt 0.05 0.00 0.21 0.00 0.00 0.00 0.00 1.00 7949 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 4253.99 0.03 1.75 4249.74 4253.06 4254.32 4255.27 4256.38 3625 1
Samples were drawn using NUTS(diag_e) at Sun Feb 25 01:27:44 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)
# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)
# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)
# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)
# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.5121
HPD: [0.4784, 0.5441]
crI: [0.4789, 0.5448]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4163
HPD: [0.3793, 0.455]
crI: [0.378, 0.4539]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6197
HPD: [0.5915, 0.6471]
crI: [0.5912, 0.647]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.2139
HPD: [0.1561, 0.2715]
crI: [0.1554, 0.2711]
print('---------------------------- First Pass Regression RANK--------------------------------------------')
[1] "---------------------------- First Pass Regression RANK--------------------------------------------"
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg_rank, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.1866
HPD: [0.1359, 0.2356]
crI: [0.1367, 0.2367]
print('---------------------------- Skip --------------------------------------------')
[1] "---------------------------- Skip --------------------------------------------"
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_skip = as.numeric(extract(fit_skip, "rho")[[1]])
mean = mean(rho_skip)
crI = quantile(rho_skip, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_skip), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7975
HPD: [0.7823, 0.8112]
crI: [0.7827, 0.8117]
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Skip --------------------------------------------')
skip_rand <- extract(fit_skip, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Skip") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(skip_rand[,1], skip_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(skip_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(skip_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(skip_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
model motr eyetr FPReg correlation (eyetr < 0.3)
print("First Pass Regression Prob. all and < 0.3")
[1] "First Pass Regression Prob. all and < 0.3"
reg_df_all = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5))
reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value < 0.3)
reg_df_low = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value < 0.3)
# mutate(eyetr_value = exp(eyetr_value),
# motr_value = exp(motr_value)
# )
# View(reg_df)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
cor
0.3024
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
[1] 0.000000000000000000000000000000000000000000000000000000915
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
cor
0.1071
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
[1] 0.000000334
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
cor
0.08719
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)
[1] 0.002145
# View(reg_df)
reg_df_low %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
main = "Not Log-Transformed")

# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
model motr eyetr FPReg correlation (eyetr >= 0.3)
print("First Pass Regression Prob. >= 0.3")
[1] "First Pass Regression Prob. >= 0.3"
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value >= 0.3)
reg_df_high = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value >= 0.3)
# mutate(eyetr_value = exp(eyetr_value),
# motr_value = exp(motr_value)
# )
# View(reg_df)
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
cor
0.4202
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
[1] 0.0000000000002819
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
cor
0.4904
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)
[1] 0.0000000000012
# View(reg_df)
reg_df_high %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))

# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
main = "FPReg not logged eyetr >= 0.3")

# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0(".//bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
fit_mreg_all = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1_drop0s.rds")
print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_mreg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.12 0.00 0.00 0.12 0.12 0.12 0.13 0.13 5811 1
mu[2] 0.03 0.00 0.00 0.02 0.03 0.03 0.03 0.03 3384 1
sigma[1] 0.08 0.00 0.00 0.07 0.07 0.08 0.08 0.08 3694 1
sigma[2] 0.05 0.00 0.00 0.05 0.05 0.05 0.05 0.06 3040 1
nu 2.42 0.00 0.14 2.16 2.33 2.42 2.52 2.71 3250 1
rho 0.16 0.00 0.02 0.11 0.14 0.16 0.17 0.20 8176 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3693 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6348 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6348 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3053 1
x_rand[1] 0.16 0.00 0.12 0.02 0.09 0.14 0.20 0.40 7811 1
x_rand[2] 0.07 0.00 0.08 0.00 0.03 0.05 0.09 0.25 7966 1
attempt 0.63 0.01 1.03 0.00 0.00 0.00 1.00 3.00 7919 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7450.58 0.03 1.78 7446.22 7449.65 7450.92 7451.89 7452.98 3519 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 23:08:09 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------"
print(fit_mreg_all_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.15 0.00 0.00 0.14 0.15 0.15 0.15 0.16 8507 1
mu[2] 0.14 0.00 0.00 0.13 0.14 0.14 0.14 0.14 7923 1
sigma[1] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 7186 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.06 6227 1
nu 2.78 0.00 0.23 2.37 2.62 2.77 2.93 3.26 7008 1
rho 0.18 0.00 0.04 0.11 0.16 0.18 0.21 0.26 9833 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 7173 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8599 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8599 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6234 1
x_rand[1] 0.18 0.00 0.13 0.02 0.10 0.16 0.22 0.43 7699 1
x_rand[2] 0.15 0.00 0.08 0.03 0.10 0.14 0.18 0.34 8175 1
attempt 0.16 0.00 0.43 0.00 0.00 0.00 0.00 1.00 7917 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 2566.44 0.03 1.75 2562.18 2565.53 2566.76 2567.72 2568.88 3487 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 23:18:11 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_mreg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.12 0.00 0.00 0.11 0.12 0.12 0.12 0.12 8566 1
mu[2] 0.03 0.00 0.00 0.03 0.03 0.03 0.04 0.04 4981 1
sigma[1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 7280 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 4552 1
nu 5.54 0.01 0.51 4.62 5.19 5.51 5.86 6.62 4637 1
rho 0.10 0.00 0.02 0.06 0.09 0.10 0.12 0.15 8596 1
cov[1,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7299 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8259 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8259 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4571 1
x_rand[1] 0.13 0.00 0.07 0.02 0.08 0.12 0.17 0.28 8277 1
x_rand[2] 0.07 0.00 0.06 0.00 0.03 0.06 0.10 0.21 7280 1
attempt 0.53 0.01 0.90 0.00 0.00 0.00 1.00 3.00 8006 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7794.82 0.03 1.78 7790.50 7793.88 7795.16 7796.13 7797.26 3232 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 20:52:03 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------"
print(fit_mreg_low_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.13 0.00 0.00 0.13 0.13 0.13 0.13 0.14 10534 1
mu[2] 0.13 0.00 0.00 0.13 0.13 0.13 0.14 0.14 8839 1
sigma[1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 8534 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.07 0.07 7122 1
nu 6.50 0.01 0.96 4.90 5.83 6.40 7.06 8.73 6790 1
rho 0.07 0.00 0.04 0.00 0.05 0.07 0.10 0.15 10356 1
cov[1,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8526 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10435 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10435 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7123 1
x_rand[1] 0.14 0.00 0.07 0.02 0.09 0.13 0.18 0.28 8031 1
x_rand[2] 0.14 0.00 0.07 0.02 0.10 0.14 0.18 0.29 8006 1
attempt 0.08 0.00 0.30 0.00 0.00 0.00 0.00 1.00 7642 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 2737.21 0.03 1.76 2732.81 2736.27 2737.56 2738.50 2739.62 3992 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 20:30:02 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_mreg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.45 0.00 0.01 0.44 0.45 0.45 0.46 0.47 5045 1
mu[2] 0.15 0.00 0.01 0.13 0.15 0.15 0.16 0.18 4628 1
sigma[1] 0.11 0.00 0.01 0.10 0.11 0.11 0.12 0.12 5622 1
sigma[2] 0.15 0.00 0.01 0.13 0.15 0.15 0.16 0.17 4499 1
nu 25.71 0.18 12.83 9.59 16.53 22.89 31.70 58.03 5109 1
rho 0.41 0.00 0.06 0.30 0.37 0.41 0.45 0.51 7137 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5625 1
cov[1,2] 0.01 0.00 0.00 0.00 0.01 0.01 0.01 0.01 4842 1
cov[2,1] 0.01 0.00 0.00 0.00 0.01 0.01 0.01 0.01 4842 1
cov[2,2] 0.02 0.00 0.00 0.02 0.02 0.02 0.03 0.03 4553 1
x_rand[1] 0.47 0.00 0.11 0.25 0.39 0.47 0.54 0.70 7964 1
x_rand[2] 0.20 0.00 0.13 0.01 0.10 0.19 0.28 0.49 7829 1
attempt 0.19 0.01 0.47 0.00 0.00 0.00 0.00 1.00 7881 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 579.26 0.03 1.75 575.10 578.35 579.56 580.56 581.68 3529 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 22:25:38 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------"
print(fit_mreg_high_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.48 0.00 0.01 0.46 0.48 0.48 0.49 0.51 6409 1
mu[2] 0.27 0.00 0.01 0.25 0.26 0.27 0.28 0.30 6397 1
sigma[1] 0.12 0.00 0.01 0.11 0.12 0.12 0.13 0.14 6555 1
sigma[2] 0.16 0.00 0.01 0.14 0.15 0.16 0.17 0.18 6281 1
nu 32.85 0.16 15.27 11.84 21.65 30.12 40.80 69.54 8953 1
rho 0.51 0.00 0.07 0.37 0.47 0.52 0.56 0.64 6986 1
cov[1,1] 0.02 0.00 0.00 0.01 0.01 0.02 0.02 0.02 6491 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5145 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5145 1
cov[2,2] 0.03 0.00 0.00 0.02 0.02 0.03 0.03 0.03 6242 1
x_rand[1] 0.49 0.00 0.13 0.25 0.41 0.49 0.57 0.74 8129 1
x_rand[2] 0.29 0.00 0.15 0.04 0.18 0.28 0.38 0.60 7467 1
attempt 0.06 0.00 0.24 0.00 0.00 0.00 0.00 1.00 7913 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 300.28 0.03 1.75 296.05 299.35 300.60 301.58 302.70 3555 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 22:31:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)
# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)
# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
#
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)
# FPReg > 0.3
stan_trace(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

stan_trace(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.

stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.

stan_plot(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)

print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1574
HPD: [0.1139, 0.2017]
crI: [0.1134, 0.2014]
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1843
HPD: [0.1106, 0.2573]
crI: [0.1114, 0.2581]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.103
HPD: [0.05775, 0.1468]
crI: [0.05805, 0.1475]
print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------"
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.07362
HPD: [-0.000616, 0.1514]
crI: [-0.003759, 0.1491]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4102
HPD: [0.3001, 0.5155]
crI: [0.2992, 0.5146]
print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------"
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.514
HPD: [0.3797, 0.6456]
crI: [0.3728, 0.6412]
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
model eye tracking to eye tracking correlation
print("Gaze Duration")
[1] "Gaze Duration"
# View(provo_eyetr_grouped_df)
egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
filter(text_id != 18) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)
cor
0.767
# View(egd_df)
egd_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

egd_data = list(x=egd_temp, N=nrow(egd_temp))
fit_egd = stan(
file="stan_models/bivariate_correlation.stan",
data=egd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models/bayesian_models_correlation/cor_bivariate/eyetr_eyetr_gaze_duration_cor.rds"))
print("Go Past Time")
[1] "Go Past Time"
egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
filter(text_id != 18) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)
cor
0.5582
# View(egd_df)
egpt_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")

egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))
fit_egpt = stan(
file="stan_models/bivariate_correlation.stan",
data=egpt_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds"))
print("Total Duration")
[1] "Total Duration"
etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
filter(text_id != 18) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)
cor
0.8341
# View(egd_df)
etd_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
main = "Total Duration Not Log-Transformed")

etd_data = list(x=etd_temp, N=nrow(etd_temp))
fit_etd = stan(
file="stan_models/bivariate_correlation.stan",
data=etd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds"))
print("Fisrt Pass Regression Prob.")
[1] "Fisrt Pass Regression Prob."
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
# filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
filter(text_id != 18) %>%
spread(measure, value) %>%
# ==== for normal data drop0s ====
# filter(value_1 > 0, value_2 > 0) %>%
# mutate(eyetr_value_1 = pmax(value_1, 1e-5),
# eyetr_value_2 = pmax(value_2, 1e-5)
# )
# ==== for ranking the data drop0s ====
mutate(
eyetr_value_1 = ifelse(value_1 > 0, rank(value_1), NA),
eyetr_value_2 = ifelse(value_2 > 0, rank(value_2), NA)
) %>%
drop_na()
# ==== for ranking the data w/ 0s ====
# mutate(
# eyetr_value_1 = rank(value_1, ties.method = "average"),
# eyetr_value_2 = rank(value_2, ties.method = "average")
# )
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
cor
0.5809
# View(egd_df)
ereg_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
main = "FPReg Not Log-Transformed")

# -------fit model FPReg ----------
# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
file="stan_models/bivariate_correlation_rank.stan",
data=ereg_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_rank_drop0s.rds"))
print("Skip Prob.")
[1] "Skip Prob."
eskip_df = provo_eyetr_grouped_df %>% filter(metric == "skip") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
# filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
filter(text_id != 18) %>%
spread(measure, value)
# ==== for normal data drop0s ====
# filter(value_1 > 0, value_2 > 0) %>%
# mutate(eyetr_value_1 = pmax(value_1, 1e-5),
# eyetr_value_2 = pmax(value_2, 1e-5)
# )
# ==== for ranking the data drop0s ====
# mutate(
# eyetr_value_1 = ifelse(value_1 > 0, rank(value_1), NA),
# eyetr_value_2 = ifelse(value_2 > 0, rank(value_2), NA)
# ) %>%
# drop_na()
# ==== for ranking the data w/ 0s ====
# mutate(
# eyetr_value_1 = rank(value_1, ties.method = "average"),
# eyetr_value_2 = rank(value_2, ties.method = "average")
# )
print(cor.test(eskip_df$value_1, eskip_df$value_2)$estimate)
cor
0.9073
eskip_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
eskip_temp <- eskip_df[c("value_1", "value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))

# Plot the first data matrix gd_temp
plot(eskip_temp, pch = 16, col = "blue",
main = "Skip Not Log-Transformed")

# -------fit model Skip ----------
# View(ereg_temp)
eskip_data = list(x=eskip_temp, N=nrow(eskip_temp))
fit_eskip = stan(
file="stan_models/bivariate_normal_reg.stan",
data=eskip_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_eskip@stanmodel@dso <- new("cxxdso")
saveRDS(fit_eskip, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_skip_cor_drop0s.rds"))
fit_egd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_gaze_duration_cor_drop0s.rds")
fit_egpt = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor_drop0s.rds")
fit_etd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor_drop0s.rds")
fit_ereg = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_drop0s.rds")
fit_eskip = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_skip_cor_drop0s.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_egd)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 234.21 0.01 1.00 232.27 233.54 234.19 234.87 236.20 4635 1
mu[2] 236.90 0.01 0.97 235.02 236.24 236.90 237.54 238.79 4910 1
sigma[1] 41.69 0.02 0.91 39.91 41.08 41.68 42.29 43.51 3619 1
sigma[2] 40.43 0.01 0.85 38.80 39.85 40.42 41.00 42.11 3582 1
nu 4.79 0.01 0.34 4.18 4.55 4.77 5.01 5.49 4490 1
rho 0.75 0.00 0.01 0.73 0.74 0.75 0.75 0.77 5064 1
cov[1,1] 1738.83 1.26 75.69 1593.04 1687.35 1737.16 1788.84 1892.83 3618 1
cov[1,2] 1261.40 1.07 60.61 1144.60 1219.78 1259.97 1301.33 1383.64 3231 1
cov[2,1] 1261.40 1.07 60.61 1144.60 1219.78 1259.97 1301.33 1383.64 3231 1
cov[2,2] 1635.31 1.15 68.73 1505.18 1588.13 1633.89 1680.99 1772.85 3581 1
x_rand[1] 235.04 0.60 53.55 126.81 204.21 234.45 265.21 343.46 8071 1
x_rand[2] 238.08 0.58 52.25 134.49 207.68 237.12 267.21 343.68 8199 1
attempt 0.00 0.00 0.06 0.00 0.00 0.00 0.00 0.00 7607 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -22639.73 0.03 1.75 -22644.12 -22640.68 -22639.40 -22638.44 -22637.35 3653 1
Samples were drawn using NUTS(diag_e) at Mon Feb 5 23:34:23 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_egpt)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 303.70 0.03 2.02 299.63 302.37 303.70 305.04 307.61 6230 1
mu[2] 302.11 0.02 1.80 298.65 300.89 302.10 303.33 305.69 6262 1
sigma[1] 77.62 0.03 1.77 74.18 76.42 77.61 78.81 81.07 4594 1
sigma[2] 68.55 0.02 1.51 65.64 67.52 68.54 69.56 71.58 4950 1
nu 2.33 0.00 0.10 2.13 2.25 2.32 2.40 2.53 5695 1
rho 0.63 0.00 0.01 0.60 0.62 0.63 0.64 0.66 7305 1
cov[1,1] 6028.02 4.05 274.38 5502.89 5840.25 6022.79 6211.77 6572.65 4595 1
cov[1,2] 3372.29 2.65 178.45 3030.75 3248.64 3367.12 3492.40 3732.72 4544 1
cov[2,1] 3372.29 2.65 178.45 3030.75 3248.64 3367.12 3492.40 3732.72 4544 1
cov[2,2] 4701.89 2.95 207.63 4308.25 4559.35 4698.02 4838.38 5123.15 4963 1
x_rand[1] 316.81 1.63 144.61 99.38 247.09 305.30 366.85 597.63 7836 1
x_rand[2] 312.58 1.38 123.98 111.40 252.31 305.23 357.38 569.45 8117 1
attempt 0.03 0.00 0.18 0.00 0.00 0.00 0.00 1.00 7784 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -26967.65 0.03 1.74 -26971.89 -26968.58 -26967.31 -26966.37 -26965.23 3624 1
Samples were drawn using NUTS(diag_e) at Mon Feb 5 23:38:33 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_etd)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 272.77 0.02 1.40 270.02 271.83 272.77 273.69 275.55 4475 1
mu[2] 267.14 0.02 1.28 264.64 266.26 267.12 268.00 269.68 4593 1
sigma[1] 60.01 0.02 1.28 57.55 59.15 60.00 60.86 62.53 3656 1
sigma[2] 54.94 0.02 1.12 52.79 54.18 54.92 55.70 57.19 3673 1
nu 5.27 0.01 0.38 4.58 5.01 5.25 5.51 6.05 4498 1
rho 0.81 0.00 0.01 0.79 0.80 0.81 0.81 0.82 4869 1
cov[1,1] 3603.09 2.54 153.48 3311.44 3498.40 3599.66 3703.53 3909.89 3659 1
cov[1,2] 2670.69 2.11 121.75 2440.79 2585.48 2667.90 2750.53 2920.02 3333 1
cov[2,1] 2670.69 2.11 121.75 2440.79 2585.48 2667.90 2750.53 2920.02 3333 1
cov[2,2] 3019.99 2.04 123.51 2787.14 2935.61 3016.10 3102.72 3271.26 3671 1
x_rand[1] 273.92 0.82 73.68 128.22 230.21 274.02 316.06 420.89 8061 1
x_rand[2] 269.13 0.75 67.33 133.79 228.98 268.90 307.33 404.88 8079 1
attempt 0.00 0.00 0.06 0.00 0.00 0.00 0.00 0.00 7660 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -24232.17 0.03 1.77 -24236.45 -24233.10 -24231.82 -24230.89 -24229.77 3293 1
Samples were drawn using NUTS(diag_e) at Mon Feb 5 23:41:09 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_ereg)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.15 0.00 0.00 0.14 0.14 0.15 0.15 0.15 3942 1
mu[2] 0.14 0.00 0.00 0.14 0.14 0.14 0.15 0.15 3878 1
sigma[1] 0.09 0.00 0.00 0.08 0.09 0.09 0.09 0.09 3623 1
sigma[2] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 3466 1
nu 3.39 0.00 0.21 3.02 3.25 3.38 3.53 3.82 4641 1
rho 0.66 0.00 0.02 0.63 0.65 0.66 0.67 0.69 5517 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3627 1
cov[1,2] 0.01 0.00 0.00 0.00 0.00 0.01 0.01 0.01 3292 1
cov[2,1] 0.01 0.00 0.00 0.00 0.00 0.01 0.01 0.01 3292 1
cov[2,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3471 1
x_rand[1] 0.18 0.00 0.11 0.02 0.11 0.16 0.23 0.43 7911 1
x_rand[2] 0.17 0.00 0.10 0.02 0.11 0.16 0.22 0.41 7898 1
attempt 0.16 0.00 0.43 0.00 0.00 0.00 0.00 1.00 8067 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 6053.37 0.03 1.73 6049.27 6052.43 6053.71 6054.65 6055.79 3589 1
Samples were drawn using NUTS(diag_e) at Tue Feb 6 22:53:58 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Skip Prob.--------------------------------------------')
[1] "---------------------------- Skip Prob.--------------------------------------------"
print(fit_eskip)
Inference for Stan model: anon_model.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.42 0.00 0.00 0.41 0.42 0.42 0.42 0.43 4394 1
mu[2] 0.47 0.00 0.00 0.46 0.46 0.47 0.47 0.47 4472 1
sigma[1] 0.25 0.00 0.00 0.24 0.24 0.25 0.25 0.25 4291 1
sigma[2] 0.22 0.00 0.00 0.21 0.22 0.22 0.22 0.22 4205 1
nu 110.17 0.30 24.51 69.67 92.41 107.29 125.40 165.37 6568 1
rho 0.91 0.00 0.00 0.90 0.91 0.91 0.91 0.91 4932 1
cov[1,1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.06 4304 1
cov[1,2] 0.05 0.00 0.00 0.05 0.05 0.05 0.05 0.05 3900 1
cov[2,1] 0.05 0.00 0.00 0.05 0.05 0.05 0.05 0.05 3900 1
cov[2,2] 0.05 0.00 0.00 0.04 0.05 0.05 0.05 0.05 4205 1
x_rand[1] 0.45 0.00 0.22 0.06 0.29 0.44 0.61 0.90 7138 1
x_rand[2] 0.49 0.00 0.20 0.12 0.34 0.48 0.63 0.90 7290 1
attempt 0.05 0.00 0.22 0.00 0.00 0.00 0.00 1.00 7109 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 5351.87 0.03 1.69 5347.60 5350.96 5352.16 5353.13 5354.21 3356 1
Samples were drawn using NUTS(diag_e) at Sun Feb 25 01:56:18 2024.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)
# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)
# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)
# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7479
HPD: [0.7278, 0.7669]
crI: [0.7278, 0.767]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6334
HPD: [0.6052, 0.6617]
crI: [0.6044, 0.6612]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8095
HPD: [0.7939, 0.825]
crI: [0.7937, 0.8248]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6638
HPD: [0.6339, 0.6922]
crI: [0.6337, 0.6921]
print('---------------------------- Skip --------------------------------------------')
[1] "---------------------------- Skip --------------------------------------------"
rho_eskip = as.numeric(extract(fit_eskip, "rho")[[1]])
mean = mean(rho_eskip)
crI = quantile(rho_eskip, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_eskip), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.9081
HPD: [0.9012, 0.9148]
crI: [0.9011, 0.9148]
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
Bayesian – correlation between MoTR and word level statistics
print("Log Frequency")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(7, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
main = "MoTR RTs and Word Frequency")
# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Word Frequency")
motr & frequency
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))
fit_mfreq = stan(
file="stan_models/stats_correlation.stan",
data=mfreq_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds"))
eyetr & frequency
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))
fit_efreq = stan(
file="stan_models/stats_correlation.stan",
data=efreq_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds"))
print("Length")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(9, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
main = "MoTR RTs and Word Length")
# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Word Length")
motr & length
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))
fit_mlen = stan(
file="stan_models/stats_correlation_len_normal.stan",
data=mlen_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds"))
eyetr & length
elen_data = list(x=elen_temp, N=nrow(elen_temp))
fit_elen = stan(
file="stan_models/stats_correlation_len_normal.stan",
data=elen_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds"))
print("Surprisal")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(8, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
main = "MoTR RTs and Surprisal")
# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Surprisal")
motr & surprisal
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))
fit_msurp = stan(
file="stan_models/stats_correlation.stan",
data=msurp_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds"))
eyetr & surprisal
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))
fit_esurp = stan(
file="stan_models/stats_correlation.stan",
data=esurp_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds"))
fit_mfreq = readRDS("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds")
print('---------------------------- MoTR & Log Frequency --------------------------------------------')
print(fit_mfreq)
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
print(fit_efreq)
print('---------------------------- MoTR & Length --------------------------------------------')
print(fit_mlen)
print('---------------------------- EyeTR & Length --------------------------------------------')
print(fit_elen)
print('---------------------------- MoTR & Surprisal --------------------------------------------')
print(fit_msurp)
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
print(fit_esurp)
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)
# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)
# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)
# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)
# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)
# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)
print('---------------------------- MoTR & Log Freq --------------------------------------------')
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- EyeTR & Log Freq --------------------------------------------')
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- MoTR & Length --------------------------------------------')
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- EyeTR & Length --------------------------------------------')
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- MoTR & Surprisal --------------------------------------------')
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5))
ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)) %>%
filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)
ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)) %>%
filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)
# View(egd_df)
ereg_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
main = "FPReg > 0.3 Not Log-Transformed")
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_all_data.rds"))
exploratory: divide eye tracking regression data into two
parts.
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_03-1.rds")
print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)
# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)
# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")
# add dataEllipse with color
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")
# add dataEllipse with color
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
---
title: "Correlation Analysis for MoTR on Provo Data"
output: html_notebook
---

```{r}
shhh <- suppressPackageStartupMessages # It's a library, so shhh!

shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))

shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)

shhh(library(car))
shhh(library(coda))
shhh(library(gridExtra))

theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}

```


# Read in MoTR Data

```{r}

rate = 160

file_prefix = "../data/provo_f160/"
fnames = list.files(path=file_prefix)

df = data.frame()
for (f in fnames) {
  temp = read.csv(paste0(file_prefix, "/", f)) %>%
    mutate(subj = str_remove(f, "_reading_measures.csv"))
  df = rbind(df, temp)
}

# Filter out readers whose accuracy to the comprehension questions were less than 80%.
filter_df = df %>%
  group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
  drop_na() %>%
  group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
  mutate(p_correct = round(p_correct, digits = 2))

filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
pilot_exceptions <- c("reader_255", "reader_256", "reader_259", "reader_261", "reader_262", "reader_263")

raw_df = df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "reader_")) %>%
  mutate(subj = as.character(subj)) %>%
  mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
  mutate(skip = if_else(FPFix == 1, 0, 1)) %>% # use the same defination as in provo paper
  dplyr::select(subj, expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, skip) %>%
  gather(metric, value, 7:10) %>%
  group_by(para_nr, subj, metric, cond_id, expr_id) %>%
  mutate(fixed = if_else(value > 0, 1, 0),
         n_fixed = sum(fixed),
         n_words = n()) %>%
    ungroup() %>%
  mutate(fix_threshold = n_fixed > (n_words / 5)) %>%
  mutate(skimming = if_else(fix_threshold == F,T, F)) %>%
  filter(skimming == F) %>%
  spread(metric, value) %>%
  dplyr::select(-fixed, -n_fixed, -n_words, -fix_threshold, -skimming)
length(unique(raw_df$subj))
# View(raw_df)

df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  filter(FPReg >= 0) %>%
  dplyr::select(FPReg) %>%
  drop_na() %>%
  summarise( m = mean(FPReg))

df %>%
  filter(! subj %in% c(filter_list) | (subj %in% pilot_exceptions)) %>%
  dplyr::select(FPFix) %>%
  drop_na() %>%
  summarise( m = mean(FPFix))


```

```{r}
View(raw_df)
```


```{r}
# Average across subjects
motr_agg_df = raw_df %>%
  gather(metric, value, 7:12) %>%
    filter(value >= 0) %>% #Removes the "NA" values for FPReg
  
    # ==== Remove skipped words
    mutate(zero = if_else(!metric %in% c("FPReg", "skip") & value == 0,T, F)) %>%
    filter(zero == F) %>%
  
    drop_na() %>%
    group_by(para_nr, word_nr, word, metric) %>%
    
    # === Remove outliers > 3SD
      # mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
  
      summarise(value = mean(value), nsubj = length(unique(subj))) %>%
  ungroup() %>%
  arrange(para_nr, word_nr) %>%
  rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
# View(motr_agg_df)

```




# Comparison to Provo


```{r}
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../data/provo_stats.csv") %>%
  dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
  rename(word_idx = trigger_idx)

provo_modeling_df

```

```{r}
# Read in Provo eyetracking data

provo_raw_df = read.csv("../data/provo_eyetracking.csv")

```

```{r}

# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))

provo_eyetracking_df = provo_raw_df %>%
  dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
  rename( #first_duration = IA_FIRST_FIXATION_DURATION,   
          gaze_duration = IA_FIRST_RUN_DWELL_TIME,
          total_duration = IA_DWELL_TIME,
          go_past_time = IA_REGRESSION_PATH_DURATION,
          subj = Participant_ID,
          text_id = Text_ID,
          sent_id = Sentence_Number,
          word_idx = Word_In_Sentence_Number,
          word_text_idx = Word_Number,   # IA_ID?
          word = Word,      # Word?
          FPReg = IA_REGRESSION_OUT,
          skip = IA_SKIP,
          ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
  mutate(first_duration = gaze_duration) %>%
  mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
         go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
  dplyr::select(-ff_progressive) %>%
  
  mutate(
    gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
      go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
      FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
      first_duration =  if_else(total_duration == 0, 0, as.double(first_duration)),
  ) %>%
  gather(metric, value, 7:12) %>%
  filter(value >= 0) %>%          # filter skipped word in eye tracking data for FPReg
  
  # ==== Remove skipped words
  mutate(zero = if_else(!metric %in% c("FPReg", "skip") & value == 0,T, F)) %>%
  filter(zero == F) %>%
  
  # mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
  # mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
  drop_na() %>%
  mutate(word = str_trim(word)) %>%
  mutate(subj = str_remove(subj, "Sub")) %>%
  mutate(subj = as.integer(subj)) %>%
    group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
  
  # === Remove outliers > 3SD
    # mutate(outlier = if_else(! metric %in% c("FPReg", "skip") & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
    # filter(outlier == F) %>%
  
  ungroup() #%>%

# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value),
              nsubj = length(unique(subj))) %>%
    ungroup()

provo_raw_df %>%
  dplyr::select(IA_REGRESSION_OUT) %>%
  drop_na() %>%
  summarise( m = mean(IA_REGRESSION_OUT))

provo_raw_df %>%
  dplyr::select(IA_SKIP) %>%
  drop_na() %>%
  summarise( m = mean(IA_SKIP))

```

```{r}

# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
  filter(subj <= 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
  rename(value_1 = value) #%>%
  # dplyr::select(-sent_id, -word_idx)


provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
  arrange(text_id, sent_id, word_idx) %>%
  filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
  rename(word = word.y) %>%
  dplyr::select(text_id, word_text_idx, metric, word, value_1) 

# View(provo_eyetracking_subj1_df)

provo_eyetracking_subj2_df = provo_eyetracking_df %>%
  filter(subj > 42) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
    summarise(value = mean(value)) %>%
  ungroup() %>%
    rename(value_2 = value)%>%
  dplyr::select(-sent_id, -word_idx)

# View(provo_eyetracking_subj2_df)
  
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
  # filter(word.x == word.y) %>%
  dplyr::select(-word.y) %>%
  # === Remove outliers > 3SD
  # group_by(metric) %>%
  #   mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
  #   filter(motr_outlier == F) %>%
  #   mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
  #   filter(eyetr_outlier == F) %>%
  # ungroup() %>%
  
  gather(measure, value, c("value_1", "value_2")) #%>%
  # dplyr::select(-motr_outlier, -eyetr_outlier)

# View(provo_eyetr_grouped_df)

```


```{r}
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
  mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
  arrange(text_id, sent_id, word_idx) %>%
  rename(eyetr_value = value) 

provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
  # almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
  filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
  
# === Remove outliers > 3SD
# group_by(metric) %>%
#   mutate(motr_outlier = if_else(! metric %in% c("FPReg", "skip") & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
#   filter(motr_outlier == F) %>%
#   mutate(eyetr_outlier = if_else(! metric %in% c("FPReg", "skip") & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
#   filter(eyetr_outlier == F) %>%
# ungroup() %>%
  
gather(measure, value, c("eyetr_value", "motr_value")) #%>%
# dplyr::select(-motr_outlier, -eyetr_outlier)
  
# provo_df
```


# Bayesian -- use Stan -- motr & eyetr correlation
```{r}
print("Gaze Duration")
gd_df = provo_df %>% filter(metric == "gaze_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
# View(gd_df)
```


```{r}
gd_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")
  
```



```{r}
# center data around 0.

gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
   # mutate(eyetr_value = eyetr_value - mean(eyetr_value),
   #      motr_value = motr_value - mean(motr_value)) %>%
  data.matrix()

gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")

```



```{r, eval=FALSE}
gd_data = list(x=gd_temp, N=nrow(gd_temp))

fit_gd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds"))

```

```{r}
print("Go Past Time")
gpt_df = provo_df %>% filter(metric == "go_past_time") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)

gpt_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()

gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=gpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds"))
```


```{r}
print("Total Duration")
td_df = provo_df %>% filter(metric == "total_duration") %>% 
  spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value =  pmax(eyetr_value, 1),
         motr_value = pmax(motr_value, 1)
  ) %>%
  mutate(eyetr_value_log = log(eyetr_value),
         motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)

td_df %>% 
  gather(measure, value, 12:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()

td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
 mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log), 
        motr_value_log = motr_value_log - mean(motr_value_log)) %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
     main = "Centered Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=td_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds"))
```

```{r}
print("First Pass Regression Prob.")
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)

# View(reg_df)
reg_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds"))
```


# rank transformation for FPReg
```{r, eval=FALSE}
reg_df = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  mutate(
    eyetr_value_rank = ifelse(eyetr_value > 0, rank(eyetr_value), NA),
    motr_value_rank = ifelse(motr_value > 0, rank(motr_value), NA)
  ) %>%
  drop_na()
#   mutate(
#   eyetr_value_rank = rank(eyetr_value, ties.method = "average"),
#   motr_value_rank = rank(motr_value, ties.method = "average")
# )
# View(reg_df)

print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank)$estimate)
print(cor.test(reg_df$eyetr_value_rank, reg_df$motr_value_rank, method = "kendall"))


reg_df %>% 
  gather(measure, value, 14:15) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp <- reg_df[c("eyetr_value_rank", "motr_value_rank")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
     main = "Rank Transformed")
  
```

```{r, eval=FALSE}
# -------fit model FPReg RANK----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
  file="stan_models/bivariate_correlation_rank.stan", 
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds"))
```

```{r}
print("skip Prob.")
skip_df = provo_df %>% filter(metric == "skip") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))
print(cor.test(skip_df$eyetr_value, skip_df$motr_value)$estimate)

# View(reg_df)
skip_df %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

skip_temp <- skip_df[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(skip_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model skip ----------
skip_data = list(x=skip_temp, N=nrow(skip_temp))
fit_skip = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=skip_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_skip@stanmodel@dso <- new("cxxdso")
saveRDS(fit_skip, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_skip_cor_drop0s.rds"))
```

```{r}
fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_drop0s.rds")
fit_reg_rank = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_rank_drop0s.rds")
fit_skip = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_skip_cor_drop0s.rds")

# models for drop 0s
# fit_gd = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./bayesian_models/bayesian_models_correlation/motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./bayesian_models/bayesian_models_correlation/ranked_motr_eyetr_FPReg_cor.rds")

print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_gd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_gpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_td)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_reg)
print('---------------------------- First Pass Regression Prob. RANK--------------------------------------------')
print(fit_reg_rank)
print('---------------------------- Skip Prob.--------------------------------------------')
print(fit_skip)

```

```{r, eval=FALSE}
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)

# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)

# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)

# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)

```

```{r, fig.width=5, fig.height=10, eval=FALSE}
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)


# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)

```



```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression RANK--------------------------------------------')
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_reg = as.numeric(extract(fit_reg_rank, "rho")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")

print('---------------------------- Skip --------------------------------------------')
# rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
rho_skip = as.numeric(extract(fit_skip, "rho")[[1]])
mean = mean(rho_skip)
crI = quantile(rho_skip, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_skip), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```


```{r, eval=FALSE}
print('---------------------------- Gaze Duration--------------------------------------------')
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
td_rand <- extract(fit_td, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
reg_rand <- extract(fit_reg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Skip --------------------------------------------')
skip_rand <- extract(fit_skip, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "Skip") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(skip_rand[,1], skip_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(skip_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(skip_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(skip_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```

# model motr eyetr FPReg correlation (eyetr < 0.3)
```{r}
print("First Pass Regression Prob. all and  < 0.3")
reg_df_all = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5))

reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)

reg_df_low = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value < 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_low %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp_low, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
```


# model motr eyetr FPReg correlation (eyetr >= 0.3)
```{r}
print("First Pass Regression Prob. >= 0.3")
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)

reg_df_high = provo_df %>% filter(metric == "FPReg") %>% 
  spread(measure, value) %>%
  # filter(eyetr_value > 0, motr_value > 0) %>%
  mutate(eyetr_value =  pmax(eyetr_value, 1e-5),
         motr_value = pmax(motr_value, 1e-5)) %>%
  filter(eyetr_value >= 0.3)
  # mutate(eyetr_value = exp(eyetr_value),
         # motr_value = exp(motr_value)
         # )
# View(reg_df)

print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate)
print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate)
print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value)

# View(reg_df)
reg_df_high %>% 
  gather(measure, value, 12:13) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
     main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
     main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
     main = "FPReg not logged eyetr >= 0.3")
```

```{r, eval=FALSE}
# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0(".//bayesian_models/bayesian_models_correlation/motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
```



```{r}
fit_mreg_all = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./bayesian_models/bayesian_models_correlation_2023/motr_eyetr_FPReg_cor_03-1_drop0s.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_mreg_all)
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
print(fit_mreg_all_drop0)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_mreg_low)
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
print(fit_mreg_low_drop0)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_mreg_high)
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
print(fit_mreg_high_drop0)
```

```{r}
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)

# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)

# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
# 
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)

# FPReg > 0.3
stan_trace(fit_mreg_high)
stan_dens(fit_mreg_high, separate_chains = TRUE)
stan_plot(fit_mreg_high)

stan_trace(fit_mreg_high_drop0)
stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
stan_plot(fit_mreg_high_drop0)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r, eval=FALSE}
print('---------------------------- First Pass Regression all data --------------------------------------------')
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

# print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")

# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


# model eye tracking to eye tracking correlation
```{r}

print("Gaze Duration")
# View(provo_eyetr_grouped_df)

egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
    filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)

# View(egd_df)

egd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")

```

```{r, eval=FALSE}
egd_data = list(x=egd_temp, N=nrow(egd_temp))

fit_egd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("./bayesian_models/bayesian_models_correlation/cor_bivariate/eyetr_eyetr_gaze_duration_cor.rds"))

```

```{r}
print("Go Past Time")

egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)

# View(egd_df)

egpt_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
     main = "Not Log-Transformed")
```

```{r, eval=FALSE}
egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))

fit_egpt = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=egpt_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor.rds"))

```


```{r}
print("Total Duration")

etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1),
         eyetr_value_2 = pmax(value_2, 1)
  ) 
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)

# View(egd_df)

etd_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
     main = "Total Duration Not Log-Transformed")
```


```{r, eval=FALSE}
etd_data = list(x=etd_temp, N=nrow(etd_temp))

fit_etd = stan(
  file="stan_models/bivariate_correlation.stan", 
  data=etd_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor.rds"))
```


```{r}
print("Fisrt Pass Regression Prob.")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  # filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
  filter(text_id != 18) %>%
    spread(measure, value) %>%
  
  # ==== for normal data drop0s ====
  # filter(value_1 > 0, value_2 > 0) %>%
  # mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
  #        eyetr_value_2 = pmax(value_2, 1e-5)
  # )
  
  # ==== for ranking the data drop0s ====
    mutate(
    eyetr_value_1 = ifelse(value_1 > 0, rank(value_1), NA),
    eyetr_value_2 = ifelse(value_2 > 0, rank(value_2), NA)
  ) %>%
  drop_na()

  # ==== for ranking the data w/ 0s ====
  #   mutate(
  #   eyetr_value_1 = rank(value_1, ties.method = "average"),
  #   eyetr_value_2 = rank(value_2, ties.method = "average")
  # )


print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model FPReg ----------

# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
  file="stan_models/bivariate_correlation_rank.stan", 
  data=ereg_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_rank_drop0s.rds"))
```

```{r}
print("Skip Prob.")

eskip_df = provo_eyetr_grouped_df %>% filter(metric == "skip") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  # filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
  filter(text_id != 18) %>%
    spread(measure, value)
  
  # ==== for normal data drop0s ====
  # filter(value_1 > 0, value_2 > 0) %>%
  # mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
  #        eyetr_value_2 = pmax(value_2, 1e-5)
  # )
  
  # ==== for ranking the data drop0s ====
  #   mutate(
  #   eyetr_value_1 = ifelse(value_1 > 0, rank(value_1), NA),
  #   eyetr_value_2 = ifelse(value_2 > 0, rank(value_2), NA)
  # ) %>%
  # drop_na()

  # ==== for ranking the data w/ 0s ====
  #   mutate(
  #   eyetr_value_1 = rank(value_1, ties.method = "average"),
  #   eyetr_value_2 = rank(value_2, ties.method = "average")
  # )


print(cor.test(eskip_df$value_1, eskip_df$value_2)$estimate)


eskip_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

eskip_temp <- eskip_df[c("value_1", "value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(eskip_temp, pch = 16, col = "blue",
     main = "Skip Not Log-Transformed")
```

```{r, eval=FALSE}
# -------fit model Skip ----------

# View(ereg_temp)
eskip_data = list(x=eskip_temp, N=nrow(eskip_temp))
fit_eskip = stan(
  file="stan_models/bivariate_normal_reg.stan", 
  data=eskip_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99),
  verbose = FALSE
  )

# Save the model 
fit_eskip@stanmodel@dso <- new("cxxdso")
saveRDS(fit_eskip, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_skip_cor_drop0s.rds"))
```



```{r}
fit_egd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_gaze_duration_cor_drop0s.rds")
fit_egpt = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_go_past_time_cor_drop0s.rds")
fit_etd = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_total_duration_cor_drop0s.rds")
fit_ereg = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_drop0s.rds")
fit_eskip = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_skip_cor_drop0s.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
print(fit_egd)
print('---------------------------- Go Past Time--------------------------------------------')
print(fit_egpt)
print('---------------------------- Total Duration--------------------------------------------')
print(fit_etd)
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
print(fit_ereg)
print('---------------------------- Skip Prob.--------------------------------------------')
print(fit_eskip)
```


```{r, eval=FALSE}
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))

# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)

# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)

# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)

# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
```


```{r}
print('---------------------------- Gaze Duration--------------------------------------------')
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Go Past Time--------------------------------------------')
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Total Duration--------------------------------------------')
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- First Pass Regression --------------------------------------------')
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- Skip --------------------------------------------')
rho_eskip = as.numeric(extract(fit_eskip, "rho")[[1]])
mean = mean(rho_eskip)
crI = quantile(rho_eskip, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_eskip), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
```


```{r, eval=FALSE}
print('---------------------------- Gaze Duration--------------------------------------------')
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Go Past Time--------------------------------------------')
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- Total Duration--------------------------------------------')
etd_rand <- extract(fit_etd, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression --------------------------------------------')
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
```


# Bayesian -- correlation between MoTR and word level statistics
```{r}
print("Log Frequency")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(7, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
  data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Frequency")

# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Frequency")

```
# motr & frequency
```{r, eval=FALSE}
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))

fit_mfreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=mfreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds"))
```

# eyetr & frequency
```{r, eval=FALSE}
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))

fit_efreq = stan(
  file="stan_models/stats_correlation.stan", 
  data=efreq_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds"))
```

```{r}
print("Length")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(9, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
  data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Word Length")

# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Word Length")
```

# motr & length
```{r, eval=FALSE}
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))

fit_mlen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=mlen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds"))

```

# eyetr & length
```{r, eval=FALSE}
elen_data = list(x=elen_temp, N=nrow(elen_temp))

fit_elen = stan(
  file="stan_models/stats_correlation_len_normal.stan", 
  data=elen_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds"))
```


```{r}
print("Surprisal")
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)

# View(stats_cor_df)
stats_cor_df %>% 
  gather(measure, value, c(8, 13)) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
  data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
     main = "MoTR RTs and Surprisal")

# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
     main = "EyeTR RTs and Surprisal")
```
# motr & surprisal
```{r, eval=FALSE}
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))

fit_msurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=msurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds"))

```

# eyetr & surprisal
```{r, eval=FALSE}
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))

fit_esurp = stan(
  file="stan_models/stats_correlation.stan", 
  data=esurp_data, 
  iter=4000, 
  chains=4, 
  cores=8,
  seed=444,
  # control=list(adapt_delta=0.99), 
  # verbose = FALSE
  )

# Save the model 
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds"))

```


```{r}
fit_mfreq = readRDS("./bayesian_models/bayesian_models_correlation/motr_freq_cor.rds")
fit_efreq = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_freq_cor.rds")
fit_mlen = readRDS("./bayesian_models/bayesian_models_correlation/motr_len_cor.rds")
fit_elen = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_len_cor.rds")
fit_msurp = readRDS("./bayesian_models/bayesian_models_correlation/motr_surp_cor.rds")
fit_esurp = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_surp_cor.rds")

print('---------------------------- MoTR & Log Frequency --------------------------------------------')
print(fit_mfreq)
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
print(fit_efreq)
print('---------------------------- MoTR & Length --------------------------------------------')
print(fit_mlen)
print('---------------------------- EyeTR & Length --------------------------------------------')
print(fit_elen)
print('---------------------------- MoTR & Surprisal --------------------------------------------')
print(fit_msurp)
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
print(fit_esurp)

```

```{r, eval=FALSE}
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)

# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)

# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)

# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)

# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)

# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)

```


```{r}
print('---------------------------- MoTR & Log Freq --------------------------------------------')
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Log Freq --------------------------------------------')
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Length --------------------------------------------')
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Length --------------------------------------------')
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")

```



```{r,eval=FALSE}
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
     xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
     xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Length --------------------------------------------')
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Length --------------------------------------------')
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- MoTR & Surprisal --------------------------------------------')
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- EyeTR & Surprisal --------------------------------------------')
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]

# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
     xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```


```{r}
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")

ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5))

ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)

ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
  # summarize(value = mean(value)) %>%
  filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
    spread(measure, value) %>%
  # smoothing, if includes 0s
  mutate(eyetr_value_1 =  pmax(value_1, 1e-5),
         eyetr_value_2 = pmax(value_2, 1e-5)) %>%
  filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high) 

print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)

# View(egd_df)

ereg_df %>% 
  gather(measure, value, 5:6) %>%
  ggplot(aes(x = value)) +
  geom_density() +
  facet_wrap(~measure, scales = "free") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1")

ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
  drop_na() %>%
  data.matrix()

# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
     main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
     main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
     main = "FPReg > 0.3 Not Log-Transformed")
```


```{r, eval=FALSE}
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
  # file="stan_models/bivariate_beta_correlation_reg.stan", 
  file = "stan_models/bivariate_normal_reg.stan",
  data=reg_data, 
  iter=4000, 
  chains=4, 
  cores=4,
  seed=444,
  # control=list(adapt_delta=0.99), 
  verbose = FALSE
  )

# Save the model 
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_all_data.rds"))
```



# exploratory: divide eye tracking regression data into two parts.
```{r}
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./bayesian_models/bayesian_models_correlation/eyetr_eyetr_FPReg_cor_03-1.rds")

print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
print(fit_ereg_all)
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
print(fit_ereg_low)
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
print(fit_ereg_high)

```


```{r}
# # FPReg all data
stan_trace(fit_ereg_all)
stan_dens(fit_ereg_all, separate_chains = TRUE)
stan_plot(fit_ereg_all)

# # FPReg < 0.3
stan_trace(fit_ereg_low)
stan_dens(fit_ereg_low, separate_chains = TRUE)
stan_plot(fit_ereg_low)

# FPReg >= 0.3
stan_trace(fit_ereg_high)
stan_dens(fit_ereg_high, separate_chains = TRUE)
stan_plot(fit_ereg_high)
```

```{r}
print('---------------------------- First Pass Regression all data--------------------------------------------')
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")


print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
```

```{r}
print('---------------------------- First Pass Regression all data --------------------------------------------')
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
     xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank

# add points for x_rand with color 
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")

# add dataEllipse with color 
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")

```
